This code imports CodePoint Open into a PostGIS database, using the files provided by OS https://www.ordnancesurvey.co.uk/business-and-government/products/code-point-open.html


In [1]:
import pandas as pd

In [2]:
codepoint_dir = r"/Users/robinlinacre/Downloads/codepo_gb"

In [3]:
# Get column names
column_headers_df = pd.read_csv("/Users/robinlinacre/Downloads/codepo_gb/Doc/Code-Point_Open_Column_Headers.csv")
headers = column_headers_df.loc[0]
headers = [h.lower() for h in list(headers)]
headers


Out[3]:
['postcode',
 'positional_quality_indicator',
 'eastings',
 'northings',
 'country_code',
 'nhs_regional_ha_code',
 'nhs_ha_code',
 'admin_county_code',
 'admin_district_code',
 'admin_ward_code']

In [4]:
# Iterate through the CSVs in codepoint open concatenating them together into one big table
import os 
files = os.listdir(os.path.join(codepoint_dir,"Data/CSV"))

dfs = []
for f in files:
    this_file = os.path.join(codepoint_dir,"Data/CSV", f)
    if ".csv" in this_file:
        this_df = pd.read_csv(this_file, header=None)
        dfs.append(this_df)

In [5]:
final_df = pd.concat(dfs)
final_df.columns = headers
final_df.head()


Out[5]:
postcode positional_quality_indicator eastings northings country_code nhs_regional_ha_code nhs_ha_code admin_county_code admin_district_code admin_ward_code
0 AB101AA 10 394251 806376 S92000003 NaN S08000020 NaN S12000033 S13002483
1 AB101AB 10 394235 806529 S92000003 NaN S08000020 NaN S12000033 S13002483
2 AB101AF 10 394181 806429 S92000003 NaN S08000020 NaN S12000033 S13002483
3 AB101AG 10 394251 806376 S92000003 NaN S08000020 NaN S12000033 S13002483
4 AB101AH 10 394371 806359 S92000003 NaN S08000020 NaN S12000033 S13002483

In [ ]:
from mylibrary.connections import engine, cursor, conn
final_df.to_sql("all_postcodes", engine, schema="tt_gh", if_exists="replace")


/Users/robinlinacre/anaconda/lib/python2.7/site-packages/sqlalchemy/dialects/postgresql/base.py:2505: SAWarning: Did not recognize type 'geometry' of column 'geom'
  (attype, name))

In [ ]:
# Create geometry column for the points including a spatial index for efficient querying

sql = """
SELECT AddGeometryColumn ('tt_gh', 'all_postcodes', 'geom', 27700, 'POINT', 2);
UPDATE tt_gh.all_postcodes SET geom = ST_GeomFromText('POINT(' || eastings || ' ' || northings || ')', 27700 );
CREATE INDEX idx_geom_all_postcodes_tt_gh ON tt.all_postcodes USING gist(geom);
"""

cursor.execute(sql)
conn.commit()